{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)\n",
    "\n",
    "import sys\n",
    "import os\n",
    "import time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "sys.path.insert(0, os.path.realpath('..'))\n",
    "import folium\n",
    "import django\n",
    "django.setup()\n",
    "from django.db import connections\n",
    "from pretty import pprint\n",
    "from firecares.firestation.models import FireDepartment, FireStation, NFIRSStatistic\n",
    "from django.db.models import Avg, Max, Min, Q\n",
    "from django.contrib.gis.geos import GEOSGeometry\n",
    "from IPython.display import display\n",
    "from firecares.utils import lenient_summation, dictfetchall\n",
    "pd.set_option(\"display.max_rows\",100)\n",
    "\n",
    "def display_geom(geom):\n",
    "    _map = folium.Map(location=[geom.centroid.y, geom.centroid.x],\n",
    "                      tiles='Stamen Toner')\n",
    "    _map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')\n",
    "    ll = geom.extent[1::-1]\n",
    "    ur = geom.extent[3:1:-1]\n",
    "    _map.fit_bounds([ll, ur])\n",
    "\n",
    "    return _map\n",
    "\n",
    "fd = FireDepartment.objects.get(id=95982)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Predictions 2015 csv processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('/firecares/predictions.2015.csv')\n",
    "cols = ['lr_fire', 'mr_fire', 'h.fire', 'lr_inj', 'mr_inj', 'h.inj', 'lr_death', 'mr_death', 'h.death', 'lr_size_2', 'mr_size_2', 'h.size2', 'lr_size_3', 'mr_size_3', 'h.size3']\n",
    "# Find stats for Richmond, VA\n",
    "richmond = df[df['fd_id'] == 93345]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Sum all Richmond rows\n",
    "df2 = richmond.groupby(['fd_id', 'state'])[cols].sum()\n",
    "df.groupby(['state'])[cols].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# High standard deviation for high-risk-level fire values\n",
    "display(richmond.std())\n",
    "display(richmond.mean())\n",
    "display(richmond.sum())\n",
    "display(richmond.max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Actuals from NFIRS, average of residential structure fires over years (for high structure risk level)\n",
    "\n",
    "pprint(list(fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=4).values('count', 'year')))\n",
    "\n",
    "high_avg = fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=4).aggregate(Avg('count')).get('count__avg')\n",
    "print 'Actual average over high-risk structure types per year: {}'.format(high_avg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# The current predicted # of fires for high risk structures\n",
    "print 'Predicted # fires for high-risk structures: {}'.format(sum([df2['h.fire'][0], df2['h.size2'][0], df2['h.size3'][0]]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Displayed value verification on FireCARES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Verify \"Number of fires -> Average since 2010\" displayed values\n",
    "\n",
    "low = fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=1).aggregate(Avg('count')).get('count__avg')\n",
    "metrics = fd.metrics.residential_fires_3_year_avg\n",
    "\n",
    "assert low == metrics.low\n",
    "\n",
    "display(low)\n",
    "display(metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Verify predicted deaths and injuries for \"Low\" structure hazard levels displayed values\n",
    "\n",
    "low = df2['lr_death'][0] + df2['lr_inj'][0]\n",
    "assert abs(low - fd.metrics.deaths_and_injuries_sum.low) < 0.0001\n",
    "display(fd.metrics.deaths_and_injuries_sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Verify sum of death and injuries over all risk levels\n",
    "v = sum(filter(lambda x: x >= 0, [df2['lr_death'][0], df2['lr_inj'][0], df2['mr_death'][0], df2['mr_inj'][0], df2['h.death'][0], df2['h.inj'][0]]))\n",
    "assert abs(v - fd.metrics.deaths_and_injuries_sum.all) < 0.0001"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Structure count graph vs heatmap N/As"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# Building fires counts/locations\n",
    "fd = FireDepartment.objects.get(id=93345)\n",
    "df = pd.read_csv('/firecares/93345-building-fires.csv')\n",
    "display(df.count())\n",
    "display(df)\n",
    "display(df.replace(np.nan, 'Unknown').groupby('risk_category').agg('count')[['alarm']].rename(columns={'alarm': 'Count'}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from django.db import connections\n",
    "from django.contrib.gis.geos import GEOSGeometry\n",
    "\n",
    "with connections['nfirs'].cursor() as c:\n",
    "    q = \"\"\"SELECT ST_Multi(ST_Union(bg.geom))\n",
    "        FROM nist.tract_years ty\n",
    "        INNER JOIN census_block_groups_2010 bg\n",
    "        ON ty.tr10_fid = ('14000US'::text || \"substring\"((bg.geoid10)::text, 0, 12))\n",
    "        WHERE ty.fc_dept_id = %(id)s\n",
    "        GROUP BY ty.fc_dept_id\"\"\"\n",
    "\n",
    "    c.execute(q, {'id': 93345})\n",
    "    geom = GEOSGeometry(c.fetchone()[0])\n",
    "\n",
    "assert geom == fd.owned_tracts_geom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Coverage for \"owned\" census tracts\n",
    "_map = folium.Map(zoom_start=12,\n",
    "                  location=[geom.centroid.y, geom.centroid.x],\n",
    "                  tiles='Stamen Toner')\n",
    "# Green background is the \"owned\" censuses for the FD based on response to incidents\n",
    "_map.choropleth(geo_str=fd.owned_tracts_geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')\n",
    "# Red outline is jurisdiction \n",
    "_map.choropleth(geo_str=fd.geom.geojson, fill_opacity=0, line_weight=10, line_color='red')\n",
    "folium.LayerControl().add_to(_map)\n",
    "\n",
    "colors = {'Low': 'green', 'Medium': 'yellow', 'High': 'red', 'nan': 'gray'}\n",
    "for r in df[['x', 'y', 'risk_category']].values:\n",
    "    if r[0] and r[1]:\n",
    "        folium.CircleMarker(location=[float(r[1]), float(r[0])],\n",
    "                       fill_color=colors[str(r[2])], radius=200, fill_opacity=0.4, popup='{}, {}'.format(r[1], r[0])).add_to(_map)\n",
    "\n",
    "display(_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Structure/parcel counts over coverage area by hazard level\n",
    "\n",
    "q = \"\"\"SELECT sum(case when l.risk_category = 'Low' THEN 1 ELSE 0 END) as low,\n",
    "        sum(CASE WHEN l.risk_category = 'Medium' THEN 1 ELSE 0 END) as medium,\n",
    "        sum(CASE WHEN l.risk_category = 'High' THEN 1 ELSE 0 END) high,\n",
    "        sum(CASE WHEN l.risk_category is null THEN 1 ELSE 0 END) as unknown\n",
    "    FROM parcel_risk_category_local l\n",
    "    JOIN (SELECT ST_SetSRID(%(owned_geom)s::geometry, 4326) as owned_geom) x\n",
    "    ON owned_geom && l.wkb_geometry\n",
    "    WHERE ST_Intersects(owned_geom, l.wkb_geometry)\"\"\"\n",
    "\n",
    "with connections['nfirs'].cursor() as c:\n",
    "    c.execute(q, {'owned_geom': fd.owned_tracts_geom.hex})\n",
    "    res = dictfetchall(c)\n",
    "    \n",
    "display(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Find multiple incidents at each location\n",
    "df2 = df.groupby(['x', 'y']).agg('count').sort('alarm', ascending=False)\n",
    "dup_df = df2[df2['alarm'] > 1][['alarm']]\n",
    "display(dup_df)\n",
    "print 'Location count w/ multiple incidents: {}'.format(len(dup_df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "q = \"\"\"\n",
    "select alarm, a.inc_type, alarms,ff_death, oth_death, ST_X(geom) as x, st_y(geom) as y, COALESCE(b.risk_category, 'Unknown') as risk_category, b.parcel_id\n",
    "from buildingfires a\n",
    "left join (SELECT * FROM\n",
    "            (SELECT state, fdid, inc_date, inc_no, exp_no, geom, b.parcel_id, b.risk_category, ROW_NUMBER() OVER (PARTITION BY state, fdid, inc_date, inc_no, exp_no, geom ORDER BY st_distance(st_centroid(b.wkb_geometry), a.geom)) AS r\n",
    "            FROM (select * from incidentaddress where state=%(state)s and fdid=%(fdid)s) a\n",
    "            left join parcel_risk_category_local b on a.geom && b.wkb_geometry) x\n",
    "            WHERE x.r = 1) b\n",
    "           using (state, inc_date, exp_no, fdid, inc_no) where state=%(state)s and fdid=%(fdid)s\n",
    "           order by a.alarm\"\"\"\n",
    "\n",
    "odf = pd.read_sql_query(q, connections['nfirs'], params={'fdid': fd.fdid, 'state': fd.state})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Map parcel misses\n",
    "\n",
    "q = \"\"\"\n",
    "select alarm, bf.inc_type, bf.alarms, ff_death, oth_death, ST_X(ia.geom) as x, ST_Y(ia.geom) as y, COALESCE(rc.risk_category, 'Unknown') as risk_category, rc.parcel_id\n",
    "from buildingfires bf\n",
    "left join incidentaddress ia\n",
    "using (state, inc_date, exp_no, fdid, inc_no)\n",
    "left join parcel_risk_category_local rc\n",
    "on rc.parcel_id = ia.parcel_id\n",
    "where bf.state=%(state)s and bf.fdid=%(fdid)s\n",
    "order by bf.alarm\"\"\"\n",
    "\n",
    "df = pd.read_sql_query(q, connections['nfirs'], params={'fdid': fd.fdid, 'state': fd.state})\n",
    "\n",
    "# Display building fires that don't have a parcel (parcel miss)\n",
    "misses = df[df['parcel_id'].isnull()]\n",
    "print 'Misses: {}'.format(misses.shape[0])\n",
    "\n",
    "import folium\n",
    "_map = folium.Map(zoom_start=12,\n",
    "                  location=[fd.geom.centroid.y, fd.geom.centroid.x],\n",
    "                  tiles='Stamen Toner')\n",
    "\n",
    "_map.choropleth(geo_str=fd.geom.geojson, line_weight=0, fill_opacity=0.1, fill_color='green')\n",
    "folium.LayerControl().add_to(_map)\n",
    "\n",
    "for r in misses[['x', 'y']].values:\n",
    "    if r[0] and r[1]:\n",
    "        folium.CircleMarker(location=[float(r[1]), float(r[0])],\n",
    "                       fill_color='gray', radius=20, fill_opacity=0.4, popup='{}, {}'.format(r[1], r[0])).add_to(_map)\n",
    "\n",
    "display(_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Parcel coverage for Richmond w/ parcel miss incidents\n",
    "q = \"\"\"\n",
    "select ST_Union(p.wkb_geometry)\n",
    "from parcel_risk_category_local p\n",
    "where ST_Intersects(p.wkb_geometry, ST_SetSRID(%(owned_geom)s::geometry, 4326))\n",
    "\"\"\"\n",
    "\n",
    "parcels = pd.read_sql_query(q, connections['nfirs'], params={'owned_geom': fd.owned_tracts_geom.hex})\n",
    "g = GEOSGeometry(parcels.values[0][0])\n",
    "_map = display_geom(g)\n",
    "\n",
    "pts = 0\n",
    "for r in misses[['x', 'y']].values:\n",
    "    if r[0] and r[1]:\n",
    "        pts += 1\n",
    "        folium.Marker(location=[float(r[1]), float(r[0])], popup='{}, {}'.format(r[1], r[0])).add_to(_map)\n",
    "display(_map)\n",
    "display(pts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Count missing geocodes on all priority departments\n",
    "from firecares.firestation.models import FireDepartment\n",
    "priorities = list(FireDepartment.priority_departments.all().values('fdid', 'state', 'name'))\n",
    "\n",
    "q = \"\"\"\n",
    "select fdid, state, sum(CASE WHEN geom is null THEN 1 ELSE 0 END) as null_count, count(1), sum(CASE WHEN geom is null THEN 1 ELSE 0 END) / count(1)::decimal * 100.0 as percent_null\n",
    "from incidentaddress\n",
    "where fdid = %(fdid)s and state = %(state)s\n",
    "group by fdid, state\n",
    "\"\"\"\n",
    "\n",
    "df = pd.DataFrame(columns=['fdid', 'state', 'null_count', 'count', 'percent_null'])\n",
    "\n",
    "for i in priorities:\n",
    "    fdid, state, name = i.values()\n",
    "    df = df.append(pd.read_sql_query(q, connections['nfirs'], params={'fdid': fdid, 'state': state}))\n",
    "    \n",
    "df = df.sort('percent_null', ascending=False)\n",
    "    \n",
    "display(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get parcel counts for departments based on owned census tracts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "fd = FireDepartment.objects.get(id=95982)\n",
    "fd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "q = \"\"\"SELECT ST_Multi(ST_Union(bg.geom))\n",
    "        FROM nist.tract_years ty\n",
    "        INNER JOIN census_block_groups_2010 bg\n",
    "        ON ty.tr10_fid = ('14000US'::text || \"substring\"((bg.geoid10)::text, 0, 12))\n",
    "        WHERE ty.fc_dept_id = %(id)s\n",
    "        GROUP BY ty.fc_dept_id\"\"\"\n",
    "\n",
    "with connections['nfirs'].cursor() as c:\n",
    "    c.execute(q, {'id': fd.id})\n",
    "    geom = c.fetchone()[0]\n",
    "    fd.owned_tracts_geom = geom\n",
    "    fd.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "display(display_geom(fd.owned_tracts_geom))\n",
    "display(fd.owned_tracts_geom.area)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "q = \"\"\"SELECT sum(case when l.risk_category = 'Low' THEN 1 ELSE 0 END) as low,\n",
    "        sum(CASE WHEN l.risk_category = 'Medium' THEN 1 ELSE 0 END) as medium,\n",
    "        sum(CASE WHEN l.risk_category = 'High' THEN 1 ELSE 0 END) high,\n",
    "        sum(CASE WHEN l.risk_category is null THEN 1 ELSE 0 END) as unknown\n",
    "    FROM parcel_risk_category_local l\n",
    "    JOIN (SELECT ST_SetSRID(%(owned_geom)s::geometry, 4326) as owned_geom) x\n",
    "    ON owned_geom && l.wkb_geometry\n",
    "    WHERE ST_Intersects(owned_geom, l.wkb_geometry)\"\"\"\n",
    "\n",
    "with connections['nfirs'].cursor() as c:\n",
    "    c.execute(q, {'owned_geom': fd.owned_tracts_geom.hex})\n",
    "    res = dictfetchall(c)\n",
    "    \n",
    "display(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "q = \"\"\"\n",
    "select ST_Union(p.wkb_geometry)\n",
    "from parcel_risk_category_local p\n",
    "where ST_Intersects(p.wkb_geometry, ST_SetSRID(%(owned_geom)s::geometry, 4326))\n",
    "\"\"\"\n",
    "\n",
    "parcels = pd.read_sql_query(q, connections['nfirs'], params={'owned_geom': fd.owned_tracts_geom.hex})\n",
    "g = GEOSGeometry(parcels.values[0][0])\n",
    "display_geom(g)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "parcel_coverage_area = g.area\n",
    "owned_tracts = fd.owned_tracts_geom.area\n",
    "print '% coverage of owned census tracts by parcels: {}'.format(parcel_coverage_area / owned_tracts)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
